[USER (data scientist)]:Hi Tapilot, I need your help to analyze the credit_customers dataset. What is the distribution of age, employment status, and credit history among our customers in the credit_customers dataset? Please provide mean, median, mode, range and distribution plots for age, employment status, and credit history.

[YOU (AI assistant)]:Sure, let's start by calculating the descriptive statistics for age, employment status, and credit history. Then, we'll create distribution plots for each of these variables.
'''
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as sns 
import pickle 
  
# Load the dataset  
credit_customers = pd.read_csv("credit_customers.csv")  
  
# Descriptive statistics for age  
age_mean = credit_customers['age'].mean()  
age_median = credit_customers['age'].median()  
age_mode = credit_customers['age'].mode()[0]  
age_range = credit_customers['age'].max() - credit_customers['age'].min()  
  
print(f"Age - Mean: {age_mean}, Median: {age_median}, Mode: {age_mode}, Range: {age_range}")  
pickle.dump(age_mean,open("./ref_result/age_mean.pkl","wb"))
pickle.dump(age_median,open('./ref_result/age_median.pkl','wb'))
pickle.dump(age_mode,open('./ref_result/age_mode.pkl','wb'))
pickle.dump(age_range,open('./ref_result/age_range.pkl','wb'))
  
# Descriptive statistics for employment status  
employment_counts = credit_customers['employment'].value_counts()  
  
print("\nEmployment Status Distribution:")  
print(employment_counts)  
pickle.dump(employment_counts,open("./ref_result/employment_counts.pkl","wb"))
  
# Descriptive statistics for credit history  
credit_history_counts = credit_customers['credit_history'].value_counts()  
  
print("\nCredit History Distribution:")  
print(credit_history_counts)  
pickle.dump(credit_history_counts,open("./ref_result/credit_history_counts.pkl","wb"))
  
# Distribution plot for age  
plt.figure(figsize=(10, 6))  
sns.histplot(credit_customers['age'], kde=True, bins=20)  
plt.title("Age Distribution")  
plt.xlabel("Age")  
plt.ylabel("Frequency")  
plt.savefig('ref_result/histplot.png')
plt.show()  
  
# Distribution plot for employment status  
plt.figure(figsize=(10, 6))  
sns.countplot(x='employment', data=credit_customers, order=credit_customers['employment'].value_counts().index)  
plt.title("Employment Status Distribution")  
plt.xlabel("Employment Status")  
plt.ylabel("Frequency")
plt.savefig('ref_result/countplot.png')  
plt.show()  
  
# Distribution plot for credit history  
plt.figure(figsize=(10, 6))  
sns.countplot(x='credit_history', data=credit_customers, order=credit_customers['credit_history'].value_counts().index)  
plt.title("Credit History Distribution")  
plt.xlabel("Credit History")  
plt.ylabel("Frequency")  
plt.xticks(rotation=45)  
plt.savefig('ref_result/countplot_2.png') 
plt.show()  
'''

[USER (data scientist)]: What is the percentage difference between the number of customers in the most common employment category and the second most common employment category?
My template of code snippet is:

---BEGIN CODE TEMPLATE---

import pandas as pd  
import pickle
  
# Load the dataset  
credit_customers = pd.read_csv("credit_customers.csv")  
  
# YOUR SOLUTION BEGIN:
<code1>
[COMPLETE YOUR CODE]   
</code1>
# YOUR SOLUTION END

print(f"The percentage difference between customers with 1 to 4 years of employment and those with 7 or more years of employment is {percentage_difference:.2f}%.")  

# save data
pickle.dump(percentage_difference,open("./pred_result/percentage_difference.pkl","wb"))

---END CODE TEMPLATE---

[YOU (AI assistant)]:Sure,  here is the code:
'''
import pandas as pd  
import pickle
  
# Load the dataset  
credit_customers = pd.read_csv("credit_customers.csv")  
  
# YOUR SOLUTION BEGIN:
